{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from keras.layers import Dense, Flatten, Dropout, Input, BatchNormalization\n",
    "from keras.utils import np_utils\n",
    "from keras.models import Sequential, Model\n",
    "from keras import regularizers\n",
    "import torch\n",
    "from keras import optimizers\n",
    "from keras.callbacks import ModelCheckpoint\n",
    "from keras.layers.merge import concatenate\n",
    "from keras import initializers\n",
    "from keras.utils import to_categorical\n",
    "from keras.layers.advanced_activations import LeakyReLU, PReLU\n",
    "import copy\n",
    "\n",
    "import os\n",
    "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\"\n",
    "import threading\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"\n",
    "import pickle\n",
    "import gc\n",
    "\n",
    "import keras\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "with open('/media/data_dump_2/Shivangi/aaai/gossip/finalTrainEmbeddings.pkl', 'rb') as f:\n",
    "    trainEmbeddings = pickle.load(f)\n",
    "with open('/media/data_dump_2/Shivangi/aaai/gossip/finalTestEmbeddings.pkl', 'rb') as f:\n",
    "    testEmbeddings = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "with open('/media/data_dump_2/Shivangi/aaai/imageDataset/gossip/trainJson.json', 'r') as f:\n",
    "    trainData = json.load(f)\n",
    "with open('/media/data_dump_2/Shivangi/aaai/imageDataset/gossip/testJson.json', 'r') as f:\n",
    "    testData = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in trainEmbeddings:\n",
    "    if len(trainEmbeddings[i]) >=50:\n",
    "        trainEmbeddings[i] = trainEmbeddings[i][0:50]\n",
    "    else:\n",
    "        deficit = 50 - len(trainEmbeddings[i])\n",
    "        for j in range(deficit):\n",
    "            trainEmbeddings[i].append(np.zeros((1,768), dtype=np.float32))\n",
    "    temp = np.empty((50,768), dtype=np.float32)\n",
    "    for j in range(len(trainEmbeddings[i])):\n",
    "        temp[j][:] = trainEmbeddings[i][j]\n",
    "    trainEmbeddings[i] = temp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/media/data_dump_2/Shivangi/aaai/gossip/train_gossip.pickle', 'rb') as f:\n",
    "    train_vgg_poli = pickle.load(f)\n",
    "with open('/media/data_dump_2/Shivangi/aaai/gossip/test_gossip.pickle', 'rb') as f:\n",
    "    test_vgg_poli = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in testEmbeddings:\n",
    "    if len(testEmbeddings[i]) >=50:\n",
    "        testEmbeddings[i] = testEmbeddings[i][0:50]\n",
    "    else:\n",
    "        deficit = 50 - len(testEmbeddings[i])\n",
    "        for j in range(deficit):\n",
    "            testEmbeddings[i].append(np.zeros((1,768), dtype=np.float32))\n",
    "    temp = np.empty((50,768), dtype=np.float32)\n",
    "    for j in range(len(testEmbeddings[i])):\n",
    "        temp[j][:] = testEmbeddings[i][j]\n",
    "    testEmbeddings[i] = temp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_text = []\n",
    "train_label = []\n",
    "test_text = []\n",
    "test_label = []\n",
    "train_image = []\n",
    "test_image = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in train_vgg_poli:\n",
    "    for j in trainData:\n",
    "        if i.split('.jpg')[0] == trainData[j][-1]['Top_img']:\n",
    "            if j in trainEmbeddings:\n",
    "                train_text.append(trainEmbeddings[j])\n",
    "                train_image.append(train_vgg_poli[trainData[j][-1]['Top_img'] + '.jpg'])\n",
    "                train_label.append(trainData[j][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "testEmbedOrder = []\n",
    "testImageOrder = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in test_vgg_poli:\n",
    "    for j in testData:\n",
    "        if i.split('.jpg')[0] == testData[j][-1]['Top_img']:\n",
    "            if j in testEmbeddings:\n",
    "                test_text.append(testEmbeddings[j])\n",
    "                test_image.append(test_vgg_poli[testData[j][-1]['Top_img'] + '.jpg'])\n",
    "                test_label.append(testData[j][0])\n",
    "                testEmbedOrder.append(j)\n",
    "                testImageOrder.append(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2458"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "2458"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10010, 10010, 10010)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(train_text), len(train_image), len(train_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2830, 2830, 2830, 2830, 2830)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(test_text), len(test_image), len(test_label), len(testEmbedOrder), len(testImageOrder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10259 2581\n"
     ]
    }
   ],
   "source": [
    "realCount = 0\n",
    "fakeCount = 0\n",
    "for i in train_label:\n",
    "    if i == 1:\n",
    "        realCount += 1\n",
    "    elif i == 0:\n",
    "        fakeCount += 1\n",
    "for i in test_label:\n",
    "    if i == 1:\n",
    "        realCount += 1\n",
    "    elif i == 0:\n",
    "        fakeCount += 1\n",
    "print(realCount, fakeCount)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "discreteTest = copy.deepcopy(test_label)2458"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_label = to_categorical(train_label)\n",
    "test_label = to_categorical(test_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 4096)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"\"\"train_text=[torch.Tensor.numpy(i.cpu()) for i in train_text]\n",
    "test_text=[torch.Tensor.numpy(i.cpu()) for i in test_text]\"\"\"\n",
    "train_image[0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_text_matrix = np.ndarray(shape=(len(train_text), 50,768))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter = 0\n",
    "for i in train_text:\n",
    "    train_text_matrix[counter][:][:] = i\n",
    "    counter += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_text_matrix = np.ndarray(shape=(len(test_text), 50,768))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter = 0\n",
    "for i in test_text:\n",
    "    test_text_matrix[counter][:][:] = i\n",
    "    counter += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2830, 50, 768)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_text_matrix.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 4096)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_image[0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_image_matrix = np.ndarray(shape=(len(train_image), 4096,1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter = 0\n",
    "for i in train_image:\n",
    "    train_image_matrix[counter][:][:] = i.reshape(4096,1)\n",
    "    counter += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_image_matrix = np.ndarray(shape=(len(test_image), 4096,1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter = 0\n",
    "for i in test_image:\n",
    "    test_image_matrix[counter][:][:] = i.reshape(4096,1)\n",
    "    counter += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10010"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(train_image_matrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_image_matrix = train_image_matrix.reshape(len(train_image_matrix),4096)\n",
    "test_image_matrix = test_image_matrix.reshape(len(test_image_matrix),4096)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inference of MM model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.models import load_model\n",
    "model = load_model('checkpoints_gossip/dense_Text_model.hdf5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions = model.predict(test_text_matrix)\n",
    "predictions = predictions.argmax(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(test_label)):\n",
    "    data.append([testEmbedOrder[i],testImageOrder[i],discreteTest[i],predictions[i]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pandas import DataFrame\n",
    "df = DataFrame.from_records(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>JR Smith's Daughter Released from NICU After B...</td>\n",
       "      <td>01r40J3oClyYJCLDrlzSYJ5c69lHOMux.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Sofia Richie Has a Scott Disick Phone Case and...</td>\n",
       "      <td>01whK2ttpeODIWZrvYFkowXXjyaDrZPs.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>Everything We Know About Brad Pitt’s Plane Inc...</td>\n",
       "      <td>02s3CNoDOz8F0oeX3tA74g6CRpmxHnvc.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>Kate Middleton Secretly Donated Her 7 Inches o...</td>\n",
       "      <td>05LFsMa2ZHO6z2GLQzaLeI8YBxCkFufq.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>General Gabbery: DWTS Or sign in with one of t...</td>\n",
       "      <td>07bXihvPVrtYL6NWtmBLRocPl6kCtGQr.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2825</td>\n",
       "      <td>Francesca Eastwood Is Pregnant! Daughter of Cl...</td>\n",
       "      <td>A0w05t3M4mDp1k78mLZxmpgA737vbwgs.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2826</td>\n",
       "      <td>Stevie Ryan Stevie Kathleen Ryan (June 2, 1984...</td>\n",
       "      <td>a0wF3r18U9Pe2YEJC9S1BBQgsHmCiDdu.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2827</td>\n",
       "      <td>Oprah tried her first Auntie Anne's pretzel an...</td>\n",
       "      <td>A38RDaGDi7RGDYpzTDfT1WIZYtj6gh1K.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2828</td>\n",
       "      <td>Demi Lovato on Her Wild Party with Snoop Dogg:...</td>\n",
       "      <td>a3fQ8ZsAZ0YRefUhOeGchznK5vJbm4VW.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2829</td>\n",
       "      <td>Catelynn &amp; Tyler Baltierra Celebrate Carly's 9...</td>\n",
       "      <td>A3oxmxQiry3QntbZ5XZg5bWEBnHiVvCa.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2830 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      0  \\\n",
       "0     JR Smith's Daughter Released from NICU After B...   \n",
       "1     Sofia Richie Has a Scott Disick Phone Case and...   \n",
       "2     Everything We Know About Brad Pitt’s Plane Inc...   \n",
       "3     Kate Middleton Secretly Donated Her 7 Inches o...   \n",
       "4     General Gabbery: DWTS Or sign in with one of t...   \n",
       "...                                                 ...   \n",
       "2825  Francesca Eastwood Is Pregnant! Daughter of Cl...   \n",
       "2826  Stevie Ryan Stevie Kathleen Ryan (June 2, 1984...   \n",
       "2827  Oprah tried her first Auntie Anne's pretzel an...   \n",
       "2828  Demi Lovato on Her Wild Party with Snoop Dogg:...   \n",
       "2829  Catelynn & Tyler Baltierra Celebrate Carly's 9...   \n",
       "\n",
       "                                         1  2  3  \n",
       "0     01r40J3oClyYJCLDrlzSYJ5c69lHOMux.jpg  1  1  \n",
       "1     01whK2ttpeODIWZrvYFkowXXjyaDrZPs.jpg  1  1  \n",
       "2     02s3CNoDOz8F0oeX3tA74g6CRpmxHnvc.jpg  0  0  \n",
       "3     05LFsMa2ZHO6z2GLQzaLeI8YBxCkFufq.jpg  1  1  \n",
       "4     07bXihvPVrtYL6NWtmBLRocPl6kCtGQr.jpg  1  1  \n",
       "...                                    ... .. ..  \n",
       "2825  A0w05t3M4mDp1k78mLZxmpgA737vbwgs.jpg  1  1  \n",
       "2826  a0wF3r18U9Pe2YEJC9S1BBQgsHmCiDdu.jpg  1  1  \n",
       "2827  A38RDaGDi7RGDYpzTDfT1WIZYtj6gh1K.jpg  1  1  \n",
       "2828  a3fQ8ZsAZ0YRefUhOeGchznK5vJbm4VW.jpg  1  1  \n",
       "2829  A3oxmxQiry3QntbZ5XZg5bWEBnHiVvCa.jpg  1  1  \n",
       "\n",
       "[2830 rows x 4 columns]"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv('gossip_text.csv', sep='\\t')\n",
    "df.columns = [\"Text\", \"image\" ,\"true\", \"predicted\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Kim Kardashian, Kanye West Snubbed By Kate Middleton And Prince William Report Debunked Kate Middleton and Prince William are not spared from rumors and malicious reports despite their stature. But the rumors of them snubbing Kim Kardashian and Kanye West was debunked. (Photo: Chris Jackson/Getty Images)\\n\\nKate Middleton and Prince William despite their stature in the world are not spared from rumors and unfounded reports that put the royal couple in a bad light. One of them is the report that they snubbed the power couple Kim Kardashian and Kanye West.\\n\\nThe report said that Kardashian feels that Middleton and Prince William are snubbing or disinterested in meeting her and her husband. The royal couple allegedly turned down the invitation to attend West\\'s concert a couple of times. The \"Keeping up with the Kardashians\" star feels that it is snobbery towards her.\\n\\nThe mother of two was not in good terms with Beyonce. She knew that the royal couple met with her during a Nets game with her husband Jay Z. Kardashian feels that Beyonce has an edge over her. Kim Kardashian was allegedly hurt especially that Middleton is her idol.\\n\\nGossip Cop debunked the above-mentioned report as fake news because it was not from a reliable source. Knowing the Duke and the Duchess of Cambridge are busy people, missing an invitation is unlikely but if not done beforehand, the possibility is that it cannot be accommodated into their schedules.\\n\\nThe Siver Times reiterated a report that the royal couple saw Kim Kardashian and her husband as a has-been. In 2015, a report has mentioned that Kim and her husband has been inviting the royal couple but were turned down again and again.\\n\\nKim sent Princess Charlotte a custom-made dress from Kanye\\'s clothing line but it was returned. One time, when Kim heard that Middleton was coming to the White House, she reportedly asked then First Lady Michelle Obama to have them met. But the First Lady declined.\\n\\nWhether Kate Middleton does not want a friendship with Kim Kardashian and Kanye West is not known but the Duchess snubbing them for no reason is not true as debunked by Gossip Cop.\\n\\nSee Now: The U.S. had the highest number of Most Wanted properties, dominating the Hotels.com Loved By Guests Awards 2018'"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[9]['Text']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Text</th>\n",
       "      <th>image</th>\n",
       "      <th>true</th>\n",
       "      <th>predicted</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>Kim Kardashian, Kanye West Snubbed By Kate Mid...</td>\n",
       "      <td>0CmVkvav787PZg99LAOi4w1gO7STt95a.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>18</td>\n",
       "      <td>Charlie Sheen says he is HIV-positive (CNN) Ac...</td>\n",
       "      <td>MTSvnGpgyCRAxMysfxPE0ZV7CZapnc0R.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>21</td>\n",
       "      <td>Duchess Meghan and Prince Harry Are Planning a...</td>\n",
       "      <td>MUvYxKV9Qj7zpDuFObuvSSLt6JVO8pXn.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>29</td>\n",
       "      <td>‘Fixer Upper’ Gets November Premiere Date For ...</td>\n",
       "      <td>mW6itzHKfU990NTOLG5KikJG0j7ml6FX.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>30</td>\n",
       "      <td>Patrick Dempsey: Why I Called Off My Divorce t...</td>\n",
       "      <td>etRcWhpIZZ2SARMBXfSEXZQfDvwUguB2.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2796</td>\n",
       "      <td>Kim Kardashian Having Plastic Surgery To Cure ...</td>\n",
       "      <td>yjT2Y9mBErC1UubMFJYo1MtZSRnjKU0I.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2799</td>\n",
       "      <td>Kelly and Ryan’s relationship may not be as su...</td>\n",
       "      <td>yKHF16O9pcHS3zdoCIPqt85dr37VbJtu.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2810</td>\n",
       "      <td>Christina Aguilera Joining ‘American Idol’?: A...</td>\n",
       "      <td>JzJ3HynrlaxsnQ74U8qrFp9fMVOVFsjv.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2814</td>\n",
       "      <td>Jennifer Aniston Plastic Surgery- Celebrity Co...</td>\n",
       "      <td>LQXtiIkANvlfUUlpFmE9zZ0yBhhqpDlg.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2815</td>\n",
       "      <td>Robert Pattinson and FKA twigs Might Not Marry...</td>\n",
       "      <td>Lw3FNiAoy5GdIJ4DVO39YbQXlFljZ0gB.jpg</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>442 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   Text  \\\n",
       "9     Kim Kardashian, Kanye West Snubbed By Kate Mid...   \n",
       "18    Charlie Sheen says he is HIV-positive (CNN) Ac...   \n",
       "21    Duchess Meghan and Prince Harry Are Planning a...   \n",
       "29    ‘Fixer Upper’ Gets November Premiere Date For ...   \n",
       "30    Patrick Dempsey: Why I Called Off My Divorce t...   \n",
       "...                                                 ...   \n",
       "2796  Kim Kardashian Having Plastic Surgery To Cure ...   \n",
       "2799  Kelly and Ryan’s relationship may not be as su...   \n",
       "2810  Christina Aguilera Joining ‘American Idol’?: A...   \n",
       "2814  Jennifer Aniston Plastic Surgery- Celebrity Co...   \n",
       "2815  Robert Pattinson and FKA twigs Might Not Marry...   \n",
       "\n",
       "                                     image  true  predicted  \n",
       "9     0CmVkvav787PZg99LAOi4w1gO7STt95a.jpg     0          1  \n",
       "18    MTSvnGpgyCRAxMysfxPE0ZV7CZapnc0R.jpg     0          1  \n",
       "21    MUvYxKV9Qj7zpDuFObuvSSLt6JVO8pXn.jpg     0          1  \n",
       "29    mW6itzHKfU990NTOLG5KikJG0j7ml6FX.jpg     0          1  \n",
       "30    etRcWhpIZZ2SARMBXfSEXZQfDvwUguB2.jpg     0          1  \n",
       "...                                    ...   ...        ...  \n",
       "2796  yjT2Y9mBErC1UubMFJYo1MtZSRnjKU0I.jpg     0          1  \n",
       "2799  yKHF16O9pcHS3zdoCIPqt85dr37VbJtu.jpg     0          1  \n",
       "2810  JzJ3HynrlaxsnQ74U8qrFp9fMVOVFsjv.jpg     0          1  \n",
       "2814  LQXtiIkANvlfUUlpFmE9zZ0yBhhqpDlg.jpg     0          1  \n",
       "2815  Lw3FNiAoy5GdIJ4DVO39YbQXlFljZ0gB.jpg     0          1  \n",
       "\n",
       "[442 rows x 4 columns]"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[df['true'] < df['predicted']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df_mm = pd.read_csv('gossip_mm.csv', sep='\\t')\n",
    "df_mm.columns = [\"Index\", \"Text\", \"image\" ,\"true\", \"predicted\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index                                                     2815\n",
       "Text         Robert Pattinson and FKA twigs Might Not Marry...\n",
       "image                     Lw3FNiAoy5GdIJ4DVO39YbQXlFljZ0gB.jpg\n",
       "true                                                         0\n",
       "predicted                                                    0\n",
       "Name: 2815, dtype: object"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#df_mm.loc[df_mm['true'] > df_mm['predicted']]\n",
    "df_mm.loc[2815]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  Indices for 1-1: 2157, 2458,2558"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (BERT)",
   "language": "python",
   "name": "bert"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
